Cadences#

Hide imports
import os
from collections import defaultdict, Counter

from git import Repo
import dimcat as dc
import ms3
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from utils import STD_LAYOUT, CADENCE_COLORS, color_background, value_count_df, get_repo_name, print_heading, resolve_dir
Hide source
CORPUS_PATH = os.path.abspath(os.path.join('..', '..'))
print_heading("Notebook settings")
print(f"CORPUS_PATH: {CORPUS_PATH!r}")
CORPUS_PATH = resolve_dir(CORPUS_PATH)
Notebook settings
-----------------

CORPUS_PATH: '/home/runner/work/workflow_deployment/distant_listening_corpus'
Hide source
repo = Repo(CORPUS_PATH)
print_heading("Data and software versions")
print(f"Data repo '{get_repo_name(repo)}' @ {repo.commit().hexsha[:7]}")
print(f"dimcat version {dc.__version__}")
print(f"ms3 version {ms3.__version__}")
Data and software versions
--------------------------

Data repo 'distant_listening_corpus' @ c964ccc
dimcat version 0.3.0
ms3 version 2.2.2
dataset = dc.Dataset()
dataset.load(directory=CORPUS_PATH, parse_tsv=False)
[annotated|all|default]
All corpora
-----------
View: This view is called 'annotated'. It 
	- excludes pieces that are not contained in the metadata,
	- filters out file extensions requiring conversion (such as .xml),
	- excludes review files and folders, and
	- includes only facets containing 'expanded'.

                                      has     active expanded       
                                 metadata       view detected parsed
corpus                                                              
ABC                                   yes  annotated       70     70
bach_en_fr_suites                     yes  annotated       89     89
bach_solo                             yes  annotated       68     68
bartok_bagatelles                     yes  annotated       14     14
beethoven_piano_sonatas               yes  annotated       64     64
c_schumann_lieder                     yes  annotated       12     12
chopin_mazurkas                       yes  annotated       55     55
corelli                               yes  annotated      149    149
couperin_clavecin                     yes  annotated        9      9
couperin_concerts                     yes  annotated       84     84
debussy_suite_bergamasque             yes  annotated        4      4
dvorak_silhouettes                    yes  annotated       12     12
frescobaldi_fiori_musicali            yes  annotated       48     48
grieg_lyric_pieces                    yes  annotated       66     66
handel_keyboard                       yes  annotated        6      6
jc_bach_sonatas                       yes  annotated       29     29
kleine_geistliche_konzerte            yes  annotated       55     55
kozeluh_sonatas                       yes  annotated       48     48
liszt_pelerinage                      yes  annotated       19     19
mahler_kindertotenlieder              yes  annotated        5      5
medtner_tales                         yes  annotated       19     19
mendelssohn_quartets                  yes  annotated       24     24
monteverdi_madrigals                  yes  annotated       27     27
mozart_piano_sonatas                  yes  annotated       54     54
pergolesi_stabat_mater                yes  annotated        7      7
peri_euridice                         yes  annotated        6      6
pleyel_quartets                       yes  annotated        6      6
poulenc_mouvements_perpetuels         yes  annotated        3      3
rachmaninoff_piano                    yes  annotated       19     19
ravel_piano                           yes  annotated        3      3
scarlatti_sonatas                     yes  annotated       69     69
schubert_winterreise                  yes  annotated       24     24
schulhoff_suite_dansante_en_jazz      yes  annotated        6      6
schumann_kinderszenen                 yes  annotated       13     13
schumann_liederkreis                  yes  annotated       12     12
sweelinck_keyboard                    yes  annotated        1      1
tchaikovsky_seasons                   yes  annotated       12     12
wagner_overtures                      yes  annotated        2      2
wf_bach_sonatas                       yes  annotated        9      9
boccherini_ensemble                   yes  annotated        0      0
cpe_bach_keyboard                     yes  annotated        0      0
debussy_childrens_corner              yes  annotated        0      0
debussy_deux_arabesques               yes  annotated        0      0
debussy_estampes                      yes  annotated        0      0
debussy_etudes                        yes  annotated        0      0
debussy_images                        yes  annotated        0      0
debussy_other_piano_pieces            yes  annotated        0      0
debussy_pour_le_piano                 yes  annotated        0      0
debussy_preludes                      yes  annotated        0      0
platti_sonatas                        yes  annotated        0      0

11/12 facets are excluded from this view.


There are 1 orphans that could not be attributed to any of the respective corpus's pieces.
N = 1222 annotated pieces, 1222 parsed dataframes.

Metadata#

all_metadata = dataset.data.metadata()
assert len(all_metadata) > 0, "No pieces selected for analysis."
print(f"Concatenated 'metadata.tsv' files cover {len(all_metadata)} of the {dataset.data.count_pieces()} scores.")
all_metadata.reset_index(level=1).groupby(level=0).nth(0).iloc[:,:20]
Concatenated 'metadata.tsv' files cover 1222 of the 1222 scores.
piece TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb n_onsets n_onset_positions guitar_chord_count form_label_count label_count annotated_key harmony_version annotators reviewers
corpus
ABC n01op18-1_01 1: 3/4 1: -1 313.0 313.0 939.00 427.0 427.0 1281.00 3132.75 4589.0 1950.0 0.0 0.0 405.0 F 1.0.0 Markus Neuwirth NaN
bach_en_fr_suites BWV806_01_Prelude 1: 12/8 1: 3 37.0 37.0 222.00 74.0 74.0 444.00 673.25 774.0 462.0 0.0 0.0 191.0 A 2.3.0 Adrian Nagel (2.1.0), Davor Krkljus (2.3.0) EMF, JH, DK
bach_solo BWV1001_01_Adagio 1: 4/4 1: -1 22.0 22.0 88.00 22.0 22.0 88.00 157.50 526.0 420.0 0.0 0.0 95.0 g 2.3.0 Adrian Nagel NaN
bartok_bagatelles op06n01 1: 4/4 1: 4 18.0 18.0 72.00 18.0 18.0 72.00 NaN 121.50 135.0 109.0 0.0 0.0 25.0 c# 2.3.0 Amelia Brey JH
beethoven_piano_sonatas 01-1 1: 2/2 1: -4 154.0 152.0 608.00 308.0 304.0 1216.00 1476.00 1679.0 985.0 0.0 0.0 241.0 f 2.3.0 Lars & Ya-Chuan (2.2.0), John Heilig (2.3.0) AN
c_schumann_lieder op13no1 Ich stand in dunklen Traumen 1: 3/4 1: -3 37.0 37.0 111.00 37.0 37.0 111.00 552.50 931.0 225.0 0.0 0.0 103.0 Eb 2.3.0 Adrian Nagel AB
chopin_mazurkas BI105-2op30-2 1: 3/4 1: 2 65.0 64.0 193.00 65.0 64.0 193.00 711.00 810.0 274.0 0.0 0.0 116.0 b 2.3.0 Wendelin Bitzan (1.0.0), Adrian Nagel (2.2.0),... JH, AN, DK
corelli op01n01a 1: 4/4 1: -1 14.0 14.0 56.00 14.0 14.0 56.00 224.00 280.0 110.0 0.0 0.0 64.0 F 2.3.0 Lars Opfermann, Ya-Chuan Wu (2.1.1), Hanné Bec... HB, JH
couperin_clavecin 00_allemande 1: 4/4 1: -1 15.0 13.0 52.00 30.0 26.0 104.00 101.75 322.0 210.0 0.0 0.0 66.0 d 2.3.0 Adrian Nagel (2.1.0), Davor Krkljus (2.3.0) DK, Hanné Becker
couperin_concerts c01n01_prelude 1: 4/4 1: 1 25.0 23.0 98.00 25.0 23.0 98.00 219.00 386.0 251.0 0.0 0.0 93.0 G 2.1.0 Eva-Maria Hamberger Johannes Menke
debussy_suite_bergamasque l075-01_suite_prelude 1: 4/4 1: -1 89.0 89.0 356.00 89.0 89.0 356.00 1533.67 1721.0 870.0 0.0 0.0 274.0 F 2.3.0 Adrian Nagel (2.1.1), Amelia Brey (2.3.0) AB, AN
dvorak_silhouettes op08n01 1: 6/8 1: 4, 7: -5, 49: 4 54.0 52.0 156.50 54.0 52.0 156.50 658.75 957.0 288.0 0.0 0.0 80.0 c# 2.3.0 Daniel Grote (2.1.1), Hanné Becker (2.3.0) Johannes Hentschel (2.1.1), AN
frescobaldi_fiori_musicali 12.01_Toccata_avanti_la_Messa_della_Domenica 1: 4/2 1: 0 8.0 8.0 64.00 8.0 8.0 64.00 NaN 244.00 200.0 121.0 0.0 0.0 57.0 d NaN NaN NaN
grieg_lyric_pieces op12n01 1: 2/4 1: -3 23.0 23.0 46.00 23.0 23.0 46.00 135.50 268.0 156.0 0.0 0.0 43.0 Eb 2.3.0 Adrian Nagel (2.1.1), John Heilig (2.30) Adrian Nagel
handel_keyboard hwv430d_Grobschmied_Aria 1: 4/4 1: 4 10.0 9.0 33.00 15.0 13.0 49.00 118.00 213.0 85.0 0.0 0.0 51.0 E 2.3.0 Adrian Nagel (2.1.0), Davor Krkljus (2.3.0) DK
jc_bach_sonatas wa01op05no1a_Allegretto 1: 2/4 1: -2 84.0 82.0 166.00 168.0 164.0 332.00 313.00 896.0 638.0 0.0 0.0 120.0 Bb 2.3.0 Adrian Nagel (2.1.1.), Ehsan Mohagheghi Fard (... AN
kleine_geistliche_konzerte op08n01swv282_Eile_mich,_Gott,_zu_erretten 1: 4/4 1: 0 68.0 68.0 272.00 68.0 68.0 272.00 NaN 507.50 315.0 251.0 0.0 0.0 84.0 d 2.1.1 Adrian Nagel NaN
kozeluh_sonatas 09op08no1a 1: 6/8 1: 0 135.0 135.0 405.00 194.0 194.0 582.00 NaN 960.00 2047.0 1351.0 0.0 0.0 272.0 C 2.1.0 Adrian Nagel NaN
liszt_pelerinage 160.01_Chapelle_de_Guillaume_Tell 1: 4/4 1: 0 97.0 97.0 388.00 97.0 97.0 388.00 1902.42 2879.0 1069.0 0.0 0.0 174.0 C 2.3.0 Adrian Nagel (2.1.1), Amelia Brey (2.3.0) Johannes Hentschel (1-33 & 82-97), AB, AN
mahler_kindertotenlieder kindertotenlieder_01_nun_will_die_sonn 1: 4/4 1: -1 85.0 84.0 337.00 85.0 84.0 337.00 1064.50 989.0 532.0 0.0 0.0 179.0 d 2.3.0 Amelia Brey DK
medtner_tales op08n01 1: 4/8 1: -3 81.0 81.0 162.00 81.0 81.0 162.00 603.00 1481.0 528.0 0.0 0.0 213.0 c 2.3.0 Wendelin Bitzan (2.2.0), John Heilig (2.3.0) Adrian Nagel, DK
mendelssohn_quartets 01op12a 1: 4/4 1: -3 294.0 292.0 1168.00 294.0 292.0 1168.00 4329.00 3638.0 1702.0 0.0 0.0 673.0 Eb 2.1.0 Adrian Nagel NaN
monteverdi_madrigals 2-12 1: 4/4 1: -1 93.0 93.0 372.00 93.0 93.0 372.00 NaN 1374.00 1011.0 454.0 0.0 0.0 225.0 F 2.1.0 Adrian Nagel NaN
mozart_piano_sonatas K279-1 1: 4/4 1: 0 100.0 100.0 400.00 200.0 200.0 800.00 767.00 2031.0 1441.0 0.0 0.0 251.0 C NaN Uli Kneisel Johannes Hentschel, Markus Neuwirth
pergolesi_stabat_mater 01. Stabat Mater dolorosa 1: 4/4 1: -4 47.0 47.0 188.00 47.0 47.0 188.00 NaN 882.50 1068.0 368.0 7.0 0.0 166.0 f 2.2.0 Uli Kneisel NaN
peri_euridice peri_euridice_scene_0 1: 4/2 1: -1 15.0 14.0 120.00 15.0 14.0 120.00 NaN 220.50 103.0 70.0 0.0 0.0 32.0 F 2.3.0 Davor Krkljus ST
pleyel_quartets b307op2n1a 1: 4/4 1: 3 199.0 197.0 793.00 284.0 283.0 1133.00 [[[87], [88]]] 2694.00 3643.0 1604.0 0.0 0.0 403.0 A 2.3.0 Adrian Nagel (2.1.0), Davor Krkljus (2.3.0) DK, AN
poulenc_mouvements_perpetuels 01_assez_modere 1: 4/4 1: 0 24.0 24.0 96.00 43.0 43.0 172.00 246.00 368.0 191.0 0.0 0.0 93.0 Bb 2.3.0 Amelia Brey DK
rachmaninoff_piano op42_01a 1: 3/4 1: -1 16.0 16.0 48.00 16.0 16.0 48.00 NaN 192.00 125.0 52.0 0.0 0.0 28.0 d 2.3.0 Amelia Brey DK
ravel_piano Ravel_-_Jeux_dEau 1: 4/4, 2: 2/4, 3: 4/4, 8: 2/4, 9: 4/4, 12: 1/... 1: 4 88.0 85.0 333.25 88.0 85.0 333.25 NaN 1143.40 4362.0 2599.0 0.0 0.0 257.0 E 2.1.0 Adrian Nagel NaN
scarlatti_sonatas K001 1: 4/4 1: -1 31.0 31.0 124.00 62.0 62.0 248.00 264.50 705.0 450.0 0.0 0.0 89.0 d 2.3.0 unknown (0.0.0), Davor Krkljus (2.3.0) DK, JH
schubert_winterreise n01 1: 2/4 1: -1, 71: 2, 99: -1 105.0 105.0 210.00 137.0 137.0 274.00 NaN 1088.75 2174.0 505.0 0.0 0.0 215.0 d 2.1.0 Alexander Faschon Johannes Hentschel
schulhoff_suite_dansante_en_jazz suite_dansante_en_jazz_1_stomp 1: 2/2 1: 0 46.0 46.0 184.00 46.0 46.0 184.00 505.83 706.0 317.0 0.0 0.0 96.0 E 2.3.0 Amelia Brey DK
schumann_kinderszenen n01 1: 2/4 1: 1 22.0 22.0 44.00 44.0 44.0 88.00 134.33 241.0 141.0 0.0 0.0 44.0 G 2.3.0 Tal Soker (2.1.1), John Heilig (2.3.0) AN, JHei, JH
schumann_liederkreis op39n01 1: 4/4 1: 3 28.0 28.0 112.00 28.0 28.0 112.00 NaN 301.25 663.0 433.0 0.0 0.0 47.0 f# 2.1.0 Uli Kneisel Adrian Nagel
sweelinck_keyboard SwWV258_fantasia_cromatica 1: 4/4 1: -1 196.0 196.0 784.00 196.0 196.0 784.00 2502.50 2639.0 1595.0 0.0 0.0 501.0 d 2.1.0 Adrian Nagel NaN
tchaikovsky_seasons op37a01 1: 3/4 1: 3, 29: 1, 63: 3 103.0 103.0 309.00 103.0 103.0 309.00 1058.17 1537.0 829.0 0.0 0.0 313.0 A 2.3.0 Adrian Nagel (2.1.1), John Heilig (2.3.0) Johannes Hentschel, AN
wagner_overtures WWV090_Tristan_01_Vorspiel-Prelude_Ricordi1888... 1: 6/8 1: 0, 44: 3, 72: 0 112.0 111.0 333.50 112.0 111.0 333.50 NaN 1224.50 1676.0 896.0 0.0 0.0 360.0 a 2.1.0 Adrian Nagel NaN
wf_bach_sonatas F001_n08a 1: 4/4 1: 0 63.0 63.0 252.00 126.0 126.0 504.00 602.75 1186.0 727.0 0.0 0.0 205.0 C 2.3.0 Christos Giannopoulos (1.0.0), Davor Krkljus (... DK, AN

All annotation labels from the selected pieces#

all_labels = dataset.data.get_facet('expanded')

print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:20].style.apply(color_background, subset="chord")
232815 hand-annotated harmony labels:
      mc mn quarterbeats quarterbeats_all_endings duration_qb mc_onset mn_onset timesig staff voice label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note alt_label volta pedalend placement
corpus piece i                                                                    
ABC n01op18-1_01 0 1 1 0 0 3.000000 0 0 3/4 4 1 F.I F I nan I nan I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan
1 2 2 3 3 3.000000 0 0 3/4 4 1 V F I nan V nan V nan nan nan nan nan nan M False False (1, 5, 2) () 1 1 nan nan nan
2 3 3 6 6 3.000000 0 0 3/4 4 1 I F I nan I nan I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan
3 4 4 9 9 6.000000 0 0 3/4 4 1 IV6 F I nan IV6 nan IV nan 6 nan nan nan nan M False False (3, 0, -1) () -1 3 nan nan nan
4 6 6 15 15 3.000000 0 0 3/4 4 1 V65 F I nan V65 nan V nan 65 nan nan nan nan Mm7 False False (5, 2, -1, 1) () 1 5 nan nan nan
5 7 7 18 18 1.000000 0 0 3/4 4 1 I F I nan I nan I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan
6 7 7 19 19 1.000000 1/4 1/4 3/4 4 1 vi F I nan vi nan vi nan nan nan nan nan nan m False False (3, 0, 4) () 3 3 nan nan nan
7 7 7 20 20 1.000000 1/2 1/2 3/4 4 1 ii6 F I nan ii6 nan ii nan 6 nan nan nan nan m False False (-1, 3, 2) () 2 -1 nan nan nan
8 8 8 21 21 2.000000 0 0 3/4 4 1 V(64) F I nan V(64) nan V nan nan 64 nan nan nan M False False (1, 0, 4) () 1 1 nan nan nan
9 8 8 23 23 1.000000 1/2 1/2 3/4 4 1 V\\ F I nan V nan V nan nan nan nan nan \\ M False False (1, 5, 2) () 1 1 nan nan nan
10 9 9 24 24 3.000000 0 0 3/4 4 1 I F I nan I nan I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan
11 10 10 27 27 3.000000 0 0 3/4 4 1 V F I nan V nan V nan nan nan nan nan nan M False False (1, 5, 2) () 1 1 nan nan nan
12 11 11 30 30 3.000000 0 0 3/4 4 1 I F I nan I nan I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan
13 12 12 33 33 6.000000 0 0 3/4 4 1 IV6 F I nan IV6 nan IV nan 6 nan nan nan nan M False False (3, 0, -1) () -1 3 nan nan nan
14 14 14 39 39 2.000000 0 0 3/4 4 1 #viio7(6)/vi F I nan #viio7(6)/vi nan #vii o 7 6 vi nan nan o7 False False (8, 5, 4, -1) () 8 8 nan nan nan
15 14 14 41 41 4.000000 1/2 1/2 3/4 4 1 #viio7/vi F I nan #viio7/vi nan #vii o 7 nan vi nan nan o7 False False (8, 5, 2, -1) () 8 8 nan nan nan
16 16 16 45 45 2.000000 0 0 3/4 4 1 #viio7(4)/ii F I nan #viio7(4)/ii nan #vii o 7 4 ii nan nan o7 False False (7, -1, 1, -2) () 7 7 nan nan nan
17 16 16 47 47 4.000000 1/2 1/2 3/4 4 1 #viio7/ii F I nan #viio7/ii nan #vii o 7 nan ii nan nan o7 False False (7, 4, 1, -2) () 7 7 nan nan nan
18 18 18 51 51 1.500000 0 0 3/4 4 1 ii6(11#7b6) F I nan ii6(11#7b6) nan ii nan 6 11#7b6 nan nan nan m False False (-1, -2, 7) (1,) 2 -1 nan nan nan
19 18 18 105/2 105/2 1.500000 3/8 3/8 3/4 4 1 ii6 F I nan ii6 nan ii nan 6 nan nan nan nan m False False (-1, 3, 2) () 2 -1 nan nan nan

Filtering out pieces without cadence annotations#

hascadence = dc.HasCadenceAnnotationsFilter().process_data(dataset)
assert () in hascadence.indices and len(hascadence.indices[()]) > 0, "No cadences found."
print(f"Before: {len(dataset.indices[()])} pieces; after removing those without cadence labels: {len(hascadence.indices[()])}")
Before: 1222 pieces; after removing those without cadence labels: 853

Show corpora containing pieces with cadence annotations#

grouped_by_corpus = dc.CorpusGrouper().process_data(hascadence)
corpora = {group[0]: f"{len(ixs)} pieces" for group, ixs in  grouped_by_corpus.indices.items()}
print(f"{len(corpora)} corpora with {sum(map(len, grouped_by_corpus.indices.values()))} pieces containing cadence annotations:")
corpora
27 corpora with 853 pieces containing cadence annotations:
{'bach_en_fr_suites': '89 pieces',
 'bach_solo': '32 pieces',
 'bartok_bagatelles': '12 pieces',
 'beethoven_piano_sonatas': '64 pieces',
 'c_schumann_lieder': '12 pieces',
 'chopin_mazurkas': '50 pieces',
 'corelli': '148 pieces',
 'couperin_clavecin': '9 pieces',
 'couperin_concerts': '84 pieces',
 'debussy_suite_bergamasque': '4 pieces',
 'dvorak_silhouettes': '12 pieces',
 'grieg_lyric_pieces': '65 pieces',
 'handel_keyboard': '6 pieces',
 'jc_bach_sonatas': '29 pieces',
 'liszt_pelerinage': '19 pieces',
 'mahler_kindertotenlieder': '5 pieces',
 'medtner_tales': '19 pieces',
 'mozart_piano_sonatas': '54 pieces',
 'peri_euridice': '6 pieces',
 'pleyel_quartets': '6 pieces',
 'poulenc_mouvements_perpetuels': '2 pieces',
 'rachmaninoff_piano': '17 pieces',
 'scarlatti_sonatas': '69 pieces',
 'schulhoff_suite_dansante_en_jazz': '6 pieces',
 'schumann_kinderszenen': '13 pieces',
 'tchaikovsky_seasons': '12 pieces',
 'wf_bach_sonatas': '9 pieces'}

All annotation labels from the selected pieces#

all_labels = hascadence.get_facet('expanded')

print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:10, 13:].style.apply(color_background, subset="chord")
138219 hand-annotated harmony labels:
      pedal chord numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta quarterbeats_all_endings special pedalend placement
corpus fname interval                                          
bach_en_fr_suites BWV806_01_Prelude [0.0, 1.5) nan I I nan nan nan nan nan { M False False (0, 4, 1) () 0 0 nan nan nan nan
[1.5, 3.0) nan V7 V nan 7 nan nan nan nan Mm7 False False (1, 5, 2, -1) () 1 1 nan nan nan nan
[3.0, 4.5) nan I I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan nan
[4.5, 6.0) nan V7 V nan 7 nan nan nan nan Mm7 False False (1, 5, 2, -1) () 1 1 nan nan nan nan
[6.0, 7.5) nan I I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan nan
[7.5, 9.0) nan V V nan nan nan nan nan nan M False False (1, 5, 2) () 1 1 nan nan nan nan
[9.0, 10.5) nan I I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan nan
[10.5, 12.0) nan I6 I nan 6 nan nan nan nan M False False (4, 1, 0) () 0 4 nan nan nan nan
[12.0, 17.5) nan I I nan nan nan nan nan }{ M False False (0, 4, 1) () 0 0 nan nan nan nan
[17.5, 18.0) nan IV IV nan nan nan nan nan nan M False False (-1, 3, 0) () -1 -1 nan nan nan nan

Metadata#

dataset_metadata = hascadence.data.metadata()
hascadence_metadata = dataset_metadata.loc[hascadence.indices[()]]
hascadence_metadata.index.rename('dataset', level=0, inplace=True)
hascadence_metadata.head()
TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb ... staff_18_ambitus staff_18_instrument staff_19_ambitus staff_19_instrument staff_20_ambitus staff_20_instrument staff_21_ambitus staff_21_instrument source.1 part_name_text
dataset piece
bach_en_fr_suites BWV806_01_Prelude 1: 12/8 1: 3 37.0 37.0 222.0 74.0 74.0 444.0 673.25 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
BWV806_02_Allemande 1: 4/4 1: 3 34.0 32.0 128.0 68.0 64.0 256.0 498.50 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
BWV806_03_Courante_I 1: 3/2 1: 3 22.0 20.0 120.0 44.0 40.0 240.0 381.00 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
BWV806_04_Courante_II 1: 3/2 1: 3 26.0 24.0 144.0 52.0 48.0 288.0 434.50 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
BWV806_05_Double_I 1: 3/2 1: 3 26.0 24.0 144.0 52.0 48.0 288.0 392.50 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 117 columns

mean_composition_years = hascadence_metadata.groupby(level=0).composed_end.mean().astype(int).sort_values()
chronological_order = mean_composition_years.index.to_list()
bar_data = pd.concat([mean_composition_years.rename('year'), 
                      hascadence_metadata.groupby(level='dataset').size().rename('pieces')],
                     axis=1
                    ).reset_index()
fig = px.bar(bar_data, x='year', y='pieces', color='dataset', title='Pieces contained in the dataset')
fig.update_traces(width=5)

Overall#

  • PAC: Perfect Authentic Cadence

  • IAC: Imperfect Authentic Cadence

  • HC: Half Cadence

  • DC: Deceptive Cadence

  • EC: Evaded Cadence

  • PC: Plagal Cadence

print(f"{all_labels.cadence.notna().sum()} cadence labels.")
value_count_df(all_labels.cadence)
8702 cadence labels.
counts %
cadence
PAC 4226 0.485635
HC 2317 0.266261
IAC 1456 0.167318
EC 255 0.029304
DC 163 0.018731
HC.SIM 109 0.012526
PC 85 0.009768
HC.CON 39 0.004482
HC.PHR 37 0.004252
HC.TEN 15 0.001724
px.pie(all_labels[all_labels.cadence.notna()], names="cadence", color="cadence", color_discrete_map=CADENCE_COLORS)

Per dataset#

cadence_count_per_dataset = all_labels.groupby("corpus").cadence.value_counts()
cadence_fraction_per_dataset = cadence_count_per_dataset / cadence_count_per_dataset.groupby(level=0).sum()
px.bar(cadence_fraction_per_dataset.rename('count').reset_index(), x='corpus', y='count', color='cadence',
      color_discrete_map=CADENCE_COLORS, category_orders=dict(dataset=chronological_order))
fig = px.pie(cadence_count_per_dataset.rename('count').reset_index(), names='cadence', color='cadence', values='count', 
       facet_col='corpus', facet_col_wrap=4, height=2000, color_discrete_map=CADENCE_COLORS)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**STD_LAYOUT)

Per phrase#

Number of cadences per phrase#

segmented = dc.PhraseSlicer().process_data(grouped_by_corpus)
phrases = segmented.get_slice_info()
phrase_segments = segmented.get_facet("expanded")
phrase_gpb = phrase_segments.groupby(level=[0,1,2])
local_keys_per_phrase = phrase_gpb.localkey.unique().map(tuple)
n_local_keys_per_phrase = local_keys_per_phrase.map(len)
phrases_with_keys = pd.concat([n_local_keys_per_phrase.rename('n_local_keys'),
                               local_keys_per_phrase.rename('local_keys'),
                               phrases], axis=1)
phrases_with_cadences = pd.concat([
    phrase_gpb.cadence.nunique().rename('n_cadences'),
    phrase_gpb.cadence.unique().rename('cadences').map(lambda l: tuple(e for e in l if not pd.isnull(e))),
    phrases_with_keys
], axis=1)
value_count_df(phrases_with_cadences.n_cadences, counts="#phrases")
#phrases %
n_cadences
1 7892 0.808607
0 1575 0.161373
2 280 0.028689
3 13 0.001332
n_cad = phrases_with_cadences.groupby(level='corpus').n_cadences.value_counts().rename('counts').reset_index().sort_values('n_cadences')
n_cad.n_cadences = n_cad.n_cadences.astype(str)
fig = px.bar(n_cad, x='corpus', y='counts', color='n_cadences', height=800, barmode='group',
             labels=dict(n_cadences="#cadences in a phrase"),
             category_orders=dict(dataset=chronological_order)
      )
fig.show()

Combinations of cadence types for phrases with more than one cadence#

value_count_df(phrases_with_cadences[phrases_with_cadences.n_cadences > 1].cadences)
counts %
cadences
(EC, PAC) 70 0.238908
(DC, PAC) 64 0.218430
(HC, PAC) 54 0.184300
(IAC, PAC) 27 0.092150
(EC, HC) 15 0.051195
(PAC, HC) 9 0.030717
(DC, HC) 6 0.020478
(HC, DC, PAC) 5 0.017065
(IAC, HC) 5 0.017065
(DC, IAC) 4 0.013652
(HC, IAC) 4 0.013652
(EC, IAC) 3 0.010239
(PAC, IAC) 3 0.010239
(IAC, EC) 3 0.010239
(HC, EC) 3 0.010239
(DC, PC) 2 0.006826
(PAC, DC) 2 0.006826
(PC, PAC) 2 0.006826
(DC, EC, PAC) 2 0.006826
(HC, DC) 1 0.003413
(IAC, DC, PAC) 1 0.003413
(IAC, HC, PAC) 1 0.003413
(DC, HC, PAC) 1 0.003413
(EC, PC) 1 0.003413
(HC, IAC, PAC) 1 0.003413
(PAC, PC) 1 0.003413
(HC, PC) 1 0.003413
(EC, DC, PAC) 1 0.003413
(DC, EC, IAC) 1 0.003413

Positioning of cadences within phrases#

df_rows = []
y_position = 0
for ix in phrases_with_cadences[phrases_with_cadences.n_cadences > 0].sort_values('duration_qb').index:
    df = phrase_segments.loc[ix]
    description = str(ix)
    if df.cadence.notna().any():
        interval = ix[2]
        df_rows.append((y_position, interval.length, "end of phrase", description))
        start_pos = interval.left
        cadences = df.loc[df.cadence.notna(), ['quarterbeats', 'cadence']]
        cadences.quarterbeats -= start_pos
        for cadence_x, cadence_type in cadences.itertuples(index=False, name=None):
            df_rows.append((y_position, cadence_x, cadence_type, description))
        y_position += 1
    #else:
    #    df_rows.append((y_position, pd.NA, pd.NA, description))
    
data = pd.DataFrame(df_rows, columns=["phrase_ix", "x", "marker", "description"])
fig = px.scatter(data[data.x.notna()], x='x', y="phrase_ix", color="marker", hover_name="description", height=3000,
                labels=dict(marker='legend'), color_discrete_map=CADENCE_COLORS)
fig.update_traces(marker_size=5)
fig.update_yaxes(autorange="reversed")
fig.show()

Cadence ultima#

phrase_segments = segmented.get_facet("expanded")
cadence_selector = phrase_segments.cadence.notna()
missing_chord_selector = phrase_segments.chord.isna()
cadence_with_missing_chord_selector = cadence_selector & missing_chord_selector
missing = phrase_segments[cadence_with_missing_chord_selector]
expanded = ms3.expand_dcml.expand_labels(phrase_segments[cadence_with_missing_chord_selector], propagate=False, chord_tones=True, skip_checks=True)
phrase_segments.loc[cadence_with_missing_chord_selector] = expanded
print(f"Ultima harmony missing for {(phrase_segments.cadence.notna() & phrase_segments.bass_note.isna()).sum()} cadence labels.")
Ultima harmony missing for 53 cadence labels.

Ultimae as Roman numeral#

def highlight(row, color="#ffffb3"):
    if row.counts < 10:
        return [None, None, None, None]
    else:
        return ["background-color: {color};"] * 4

cadence_counts = all_labels.cadence.value_counts()
ultima_root = phrase_segments.groupby(['localkey_is_minor', 'cadence']).numeral.value_counts().rename('counts').to_frame().reset_index()
ultima_root.localkey_is_minor = ultima_root.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_root.style.apply(highlight, axis=1)
fig = px.pie(ultima_root, names='numeral', values='counts', 
             facet_row='cadence', facet_col='localkey_is_minor', 
             height=1500,
             category_orders={'cadence': cadence_counts.index},
            )
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()
#phrase_segments.groupby(level=[0,1,2], group_keys=False).apply(lambda df: df if ((df.cadence == 'PAC') & (df.numeral == 'V')).any() else None)

Ultimae bass note as scale degree#

ultima_bass = phrase_segments.groupby(['localkey_is_minor','cadence']).bass_note.value_counts().rename('counts').reset_index()
ultima_bass.bass_note = ms3.transform(ultima_bass, ms3.fifths2sd, dict(fifths='bass_note', minor='localkey_is_minor'))
ultima_bass.localkey_is_minor = ultima_bass.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_bass.style.apply(highlight, axis=1)
fig = px.pie(ultima_bass, names='bass_note', values='counts', 
             facet_row='cadence', facet_col='localkey_is_minor', 
             height=1500, 
             category_orders={'cadence': cadence_counts.index},
            )
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()

Chord progressions#

PACs with ultima I/i#

def remove_immediate_duplicates(l):
    return tuple(a for a, b in zip(l, (None, ) + l) if a != b)

def get_progressions(selected='PAC', last_row={}, feature='chord', dataset=None, as_series=True, remove_duplicates=False):
    """Uses the nonlocal variable phrase_segments."""
    last_row = {k: v if isinstance(v, tuple) else (v,) for k, v in last_row.items()}
    progressions = []

    for (corp, fname, *_), df in phrase_segments[phrase_segments[feature].notna()].groupby(level=[0,1,2]):
        if dataset is not None and dataset not in corp:
            continue
        if (df.cadence == selected).fillna(False).any():
            # remove chords after the last cadence label
            df = df[df.cadence.fillna(method='bfill').notna()]
            # group segments leading up to a cadence label
            cadence_groups = df.cadence.notna().shift().fillna(False).cumsum()
            for i, cadence in df.groupby(cadence_groups):
                last_r = cadence.iloc[-1]
                typ = last_r.cadence
                if typ != selected:
                    continue
                if any(last_r[feat] not in values for feat, values in last_row.items()):
                    continue
                if remove_duplicates:
                    progressions.append(remove_immediate_duplicates(cadence[feature].to_list()))
                else:
                    progressions.append(tuple(cadence[feature]))
    if as_series:
        return pd.Series(progressions, dtype='object')
    return progressions
chord_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'chord')
print(f"Progressions for {len(chord_progressions)} cadences:")
value_count_df(chord_progressions, "chord progressions")
Progressions for 4176 cadences:
counts %
chord progressions
(I, V7, I) 15 0.003592
(I, V(64), V7, I) 13 0.003113
(V, V7, I, ii6(2), ii6, V7, I) 12 0.002874
(I, I6, IV, V(64), V, I) 10 0.002395
(I, IV, V(64), V, I) 9 0.002155
... ... ...
(i/v, IV6(2), IV6, viio, i(4), i, i6, IV6(2)/VI, IV6/VI, viio/VI, VI(4), VI, i6, IV, V(4), V, i) 1 0.000239
(i, V, i/v, V(4)/v, V/v, i/v) 1 0.000239
(i/v, V/v, #viio, i(9), i, V(4), V, i) 1 0.000239
(i, V7, i, V7, I, I6, IV(9), IV, ii6, V7, vi, ii65, V, I) 1 0.000239
(I, I64, I6, IVM7, ii6, ii, iii7, I6, V(64), V, I) 1 0.000239

3548 rows × 2 columns

numeral_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'numeral')
value_count_df(numeral_progressions, "numeral progressions")
counts %
numeral progressions
(I, V, V, I) 28 0.006705
(I, IV, V, V, I) 22 0.005268
(I, V, I) 20 0.004789
(I, ii, V, V, I) 15 0.003592
(I, V, I, V, I, V, I, V, I) 14 0.003352
... ... ...
(i, iv, VII, III, VI, ii, i, V, i) 1 0.000239
(iv, VII, III, VI, ii, i, V, i) 1 0.000239
(i, VI, v, v, iv, iv, V, V, V, V, i) 1 0.000239
(i, V, i, V, #vii, III, i, iv, V, i, V, V, i) 1 0.000239
(i, V, III, i, V, V, i, v, iv, iv, V, i, ii, V, i) 1 0.000239

3261 rows × 2 columns

numeral_prog_no_dups = numeral_progressions.map(remove_immediate_duplicates)
value_count_df(numeral_prog_no_dups)
counts %
(I, V, I) 85 0.020354
(I, IV, V, I) 61 0.014607
(I, ii, V, I) 45 0.010776
(I, V, I, V, I) 36 0.008621
(I, V, I, ii, V, I) 33 0.007902
... ... ...
(V, iv, V, iv, i, #vii, i, V, i) 1 0.000239
(i, V, #vii, i, ii, iv, V, i, V, iv, V, i, iv, V, i) 1 0.000239
(vi, i, ii, #vii, i, iv, V, iv, V, i) 1 0.000239
(i, V, i, ii, v, iv, I, ii, V, I) 1 0.000239
(I, IV, V, IV, vii, V, I, IV, I, ii, V, I, IV, V, I) 1 0.000239

2790 rows × 2 columns

PACs ending on scale degree 1#

Scale degrees expressed w.r.t. major scale, regardless of actual key.

bass_progressions = get_progressions('PAC', dict(bass_note=0), 'bass_note')
bass_prog = bass_progressions.map(ms3.fifths2sd)
print(f"Progressions for {len(bass_progressions)} cadences:")
value_count_df(bass_prog, "bass progressions")
Progressions for 3825 cadences:
counts %
bass progressions
(1, 4, 5, 5, 1) 31 0.008105
(1, 5, 1) 28 0.007320
(1, 5, 5, 1) 20 0.005229
(1, 3, 4, 5, 5, 1) 17 0.004444
(1, 2, 3, 4, 5, 5, 1) 14 0.003660
... ... ...
(5, 7, 2, 5, 1, 4, 7, 3, 6, 2, 5, 1) 1 0.000261
(6, 7, 1, 4, 5, 1, 4, 4, 5, 3, 2, 5, 1, 4, 4, 5, 3, 2, 5, 1) 1 0.000261
(1, 7, 6, 6, 5, 5, 4, 3, 2, 1) 1 0.000261
(5, 5, 4, 4, 3, 3, 2, 2, 1) 1 0.000261
(1, 2, 1, 2, 2, 4, 6, 6, 5, 5, #4, #4, #4, #4, #4, 4, 4, 3, 5, 5, 5, 1) 1 0.000261

2970 rows × 2 columns

bass_prog_no_dups = bass_prog.map(remove_immediate_duplicates)
value_count_df(bass_prog_no_dups)
counts %
(1, 5, 1) 76 0.019869
(1, 4, 5, 1) 57 0.014902
(1, 5, 1, 5, 1) 40 0.010458
(1, 2, 3, 4, 5, 1) 29 0.007582
(1, 3, 4, 5, 1) 27 0.007059
... ... ...
(1, 5, 2, 1, 5, 4, b3, 2, b3, 4, 5, 1) 1 0.000261
(1, 7, b7, 6, b6, 5, 1) 1 0.000261
(1, 5, 2, 7, 1, 5, 1) 1 0.000261
(b3, 4, 5, 1, 2, b3, 4, 5, 1) 1 0.000261
(#5, 6, #5, 6, #5, 6, 7, 1, 7, 1, 7, 1, b3, 4, 5, 1, 5, 1) 1 0.000261

2671 rows × 2 columns

def make_sankey(data, labels, node_pos=None, margin={'l': 10, 'r': 10, 'b': 10, 't': 10}, pad=20, color='auto', **kwargs):
    if color=='auto':
        unique_labels = set(labels)
        color_step = 100 / len(unique_labels)
        unique_colors = {label: f'hsv({round(i*color_step)}%,100%,100%)' for i, label in enumerate(unique_labels)}
        color = list(map(lambda l: unique_colors[l], labels))
    fig = go.Figure(go.Sankey(
        arrangement = 'snap',
        node = dict(
          pad = pad,
          #thickness = 20,
          #line = dict(color = "black", width = 0.5),
          label = labels,
          x = [node_pos[i][0] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
          y = [node_pos[i][1] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
          color = color,
          ),
        link = dict(
          source = data.source,
          target = data.target,
          value = data.value
          ),
        ),
     )

    fig.update_layout(margin=margin, **kwargs)
    return fig

def progressions2graph_data(progressions, cut_at_stage=None):
    stage_nodes = defaultdict(dict)
    edge_weights = Counter()
    node_counter = 0
    for progression in progressions:
        previous_node = None
        for stage, current in enumerate(reversed(progression)):
            if cut_at_stage and stage > cut_at_stage:
                break
            if current in stage_nodes[stage]:
                current_node = stage_nodes[stage][current]
            else:
                stage_nodes[stage][current] = node_counter
                current_node = node_counter
                node_counter += 1
            if previous_node is not None:
                edge_weights.update([(current_node, previous_node)])
            previous_node = current_node
    return stage_nodes, edge_weights

def graph_data2sankey(stage_nodes, edge_weights):
    data = pd.DataFrame([(u, v, w) for (u, v), w in edge_weights.items()], columns = ['source', 'target', 'value'])
    node2label = {node: label for stage, nodes in stage_nodes.items() for label, node in nodes.items()}
    labels = [node2label[i] for i in range(len(node2label))]
    return make_sankey(data, labels)

def plot_progressions(progressions, cut_at_stage=None):
    stage_nodes, edge_weights = progressions2graph_data(progressions, cut_at_stage=cut_at_stage)
    return graph_data2sankey(stage_nodes, edge_weights)

Chordal roots for the 3 last stages#

plot_progressions(numeral_prog_no_dups, cut_at_stage=3)

Complete chords for the last four stages in major#

pac_major = get_progressions('PAC', dict(numeral='I', localkey_is_minor=False), 'chord')
plot_progressions(pac_major, cut_at_stage=4)

Bass degrees for the last 6 stages.#

plot_progressions(bass_prog_no_dups, cut_at_stage=7)

Bass degrees without accidentals#

def remove_sd_accidentals(t):
    return tuple(map(lambda sd: sd[-1], t))
                  
bass_prog_no_acc_no_dup = bass_prog.map(remove_sd_accidentals).map(remove_immediate_duplicates)
plot_progressions(bass_prog_no_acc_no_dup, cut_at_stage=7)

HCs ending on V#

half = get_progressions('HC', dict(numeral='V'), 'bass_note').map(ms3.fifths2sd)
print(f"Progressions for {len(half)} cadences:")
plot_progressions(half.map(remove_immediate_duplicates), cut_at_stage=5)
Progressions for 2222 cadences: